/*Author: Ilana M Ventura														  */
/*Date: September 2024															  */
/*Step 5: Table S3. Logistic regressions predicting Black and Indigenous 	 	  */
/*		  			Identification among self-identified Hispanics				  */



/*Table S3. Logistic regressions predicting Black and Indigenous Identification among self-identified Hispanics*/
*Create interactin terms;
Data sl_dat_or;
	set sl_dat;
	if filter_rand =1 and skin_med  =1 then skin_med_2030=1; else skin_med_2030=0; 
	if filter_rand =1 and skin_dark =1 then skin_dark_2030=1; else skin_dark_2030=0;
	if filter_rand =1 and Orig_PR  =1 then Orig_PR_2030=1; else Orig_PR_2030=0;
	if filter_rand =1 and Orig_Cuba  =1 then Orig_Cuba_2030=1; else Orig_Cuba_2030=0;
	if filter_rand =1 and Orig_Other =1 then Orig_Other_2030=1; else Orig_Other_2030=0;
	if filter_rand =1 and Discrimination =1 then Discrimination_2030=1; else Discrimination_2030=0;
	if filter_rand =1 and R_US_Born =1 then R_US_Born_2030=1; else R_US_Born_2030=0;
run;

proc freq data= sl_dat_or;
	table filter_rand*skin_med*skin_light*skin_dark*skin_med_2030*skin_dark_2030
		filter_rand*Orig_PR*Orig_Cuba*Orig_Other*Orig_PR_2030*Orig_Cuba_2030*Orig_Other_2030
		filter_rand*Discrimination*Discrimination_2030
		filter_rand*R_US_Born*R_US_Born_2030/list missing norow nocol;
run;


*set pred probs;
data sl_dat_or;
	set sl_dat_or;
	length p_skincolor p_Origin p_discrim p_nativity $80.;
	
	if skin_light=1 and filter_rand=1 then p_skincolor="Skin Color: Light, 2030";
	if skin_med=1 and filter_rand=1 then p_skincolor="Skin Color: Medium, 2030";
	if skin_dark=1 and filter_rand=1 then p_skincolor="Skin Color: Dark, 2030";
	if skin_light=1 and filter_rand=2 then p_skincolor="Skin Color: Light, 2020";
	if skin_med=1 and filter_rand=2 then p_skincolor="Skin Color: Medium, 2020";
	if skin_dark=1 and filter_rand=2 then p_skincolor="Skin Color: Dark, 2020";
	
	if Orig_PR=1 and filter_rand=1 then p_Origin="Origin: PR, 2030";
	if Orig_Cuba=1 and filter_rand=1 then p_Origin="Origin: Cuba, 2030";
	if Orig_Other=1 and filter_rand=1 then p_Origin="Origin: Other, 2030";
	if Orig_Mex=1 and filter_rand=1 then p_Origin="Origin: Mexico, 2030";
	if Orig_PR=1 and filter_rand=2 then p_Origin="Origin: PR, 2020";
	if Orig_Cuba=1 and filter_rand=2 then p_Origin="Origin: Cuba, 2020";
	if Orig_Other=1 and filter_rand=2 then p_Origin="Origin: Other, 2020";
	if Orig_Mex=1 and filter_rand=2 then p_Origin="Origin: Mexico, 2020";
		/*correction*/
			if sum (Orig_PR, Orig_Cuba, Orig_Mex ) >1 then p_Origin="";				
	
	if Discrimination=1 and filter_rand=1 then p_discrim="Discrimination, 2030";
	if Discrimination=0 and filter_rand=1 then p_discrim="No Discrimination, 2030";
	if Discrimination=1 and filter_rand=2 then p_discrim="Discrimination, 2020";
	if Discrimination=0 and filter_rand=2 then p_discrim="No Discrimination, 2020";

	if R_US_Born=1 and filter_rand=1 then p_nativity="US Born, 2030";
	if R_US_Born=0 and filter_rand=1 then p_nativity="Foreign Born, 2030";
	if R_US_Born=1 and filter_rand=2 then p_nativity="US Born, 2020";
	if R_US_Born=0 and filter_rand=2 then p_nativity="Foreign Born, 2020";	
run;

proc freq data= sl_dat_or;
	table p_skincolor*filter_rand*skin_med*skin_light*skin_dark
		p_Origin*filter_rand*Orig_PR*Orig_Cuba*Orig_Mex
		p_discrim*filter_rand*Discrimination
		p_nativity*filter_rand*R_US_Born / list missing norow nocol;
run;


/****output****/

proc logistic data = sl_dat_or desc ;
where hisp=1;
class  filter_rand / param = ref;
weight weight_genB_13;
model black =   filter_rand Orig_PR Orig_Cuba Orig_Other
				Orig_PR_2030 Orig_Cuba_2030 Orig_Other_2030/rsq;
title "National Origin (vs Mexican)";
output out=p_out_Origin p=est_response UPPER=U LOWER=L;
ods output NObs=Mod_N_NatOrig FitStatistics=Mod_fit_NatOrig RSquare=Mod_Rsq_NatOrig
	ParameterEstimates= Mod_est_NatOrig OddsRatios=Mod_odds_NatOrig ModelInfo=Mod_Info_NatOrig
	Association=Mod_Ascn_NatOrig ;
run; 


proc freq data= p_out_Origin ;
	where p_Origin ne "";
	table filter_rand*p_Origin*est_response*l*u
		/ list missing norow nocol nocum out=int_mod_origin;
	title "Origin";
run;



proc logistic data = sl_dat_or desc;
where hisp=1;
class  filter_rand / param = ref;
weight weight_genB_13;
model black = filter_rand skin_med  skin_dark
				skin_med_2030  skin_dark_2030 /rsq;
title "Skin Color (vs Light)";
output out=p_out_SkinColor p=est_response UPPER=U LOWER=L;
ods output NObs=Mod_N_SkinColor FitStatistics=Mod_fit_SkinColor RSquare=Mod_Rsq_SkinColor
	ParameterEstimates= Mod_est_SkinColor OddsRatios=Mod_odds_SkinColor ModelInfo=Mod_Info_SkinColor
	Association=Mod_Ascn_SkinColor;
	title "Skin Color";
run;
	 

proc freq data= p_out_SkinColor;
	where p_Origin ne "";
	table p_skincolor*est_response*l*u/ list missing norow nocol nocum out=int_mod_skin;
	title "Skin Color";
run;

	 
proc logistic data = sl_dat_or desc ;
where hisp=1;
class filter_rand / param = ref;
weight weight_genB_13;
model black = filter_rand R_US_Born
				 R_US_Born_2030/rsq;
output out=p_out_Nativity p=est_response STDXBETA=	sterr UPPER=U LOWER=L ;
title "US Born";
ods output NObs=Mod_N_USBorn FitStatistics=Mod_fit_USBorn RSquare=Mod_Rsq_USBorn
	ParameterEstimates= Mod_est_USBorn OddsRatios=Mod_odds_USBorn ModelInfo=Mod_Info_USBorn
	Association=Mod_Ascn_USBorn;
run;


proc freq data= p_out_Nativity;
	where p_nativity ne "";
	table p_nativity*est_response*l*u/ list missing norow nocol nocum  out=int_mod_Nativity;
run;



%macro loop(varlist);
%let i=1;
%do %while (%scan(&varlist, &i, |) ^=%str());
%let var=%scan(&varlist, &i, |); 
%put &var;


%let model= Mod_est_&var;
%let fit= Mod_fit_&var;
%let num= Mod_N_&var;
%let rsq= Mod_Rsq_&var;
%let VarName= Mod_Info_&var;
%let ascn= Mod_Ascn_&var;

Data &VarName;
	set &VarName;
	if description="Response Variable";
	CALL SYMPUT('DepVar',value) ;
run;
%put &DepVar &var;


/*put global macros for starting model and output name*/
%put &model &fit &num &rsq ;


*full model;


Data Mod;
	set &model ;
	length stars $5. ClassVal0 $30.;
	if strip(ProbChiSq) <.0001 then stars="***";
		else if ProbChiSq <.001 then stars="**";
		else if ProbChiSq <.05 then stars="*";
	n+1;
	if ClassVal0="" then ClassVal0="";
	keep n Variable estimate StdErr stars   ClassVal0 ;

run;


proc transpose data= Mod out=Mod_t;
	var estimate StdErr;
	by n Variable stars  ClassVal0;
run;

Data Mod_t;
	set Mod_t;
	col1_a=round(col1,.001);
	Variable_merge=Variable;
	If _NAME_= "Estimate" then Model_&var=cats(col1_a,stars);
		else if  _NAME_= "StdErr" then do;
			Model_&var=cats("(",col1_a,")");
			Variable="";
		end;
	if _NAME_= "StdErr" then n2=n+.5;
		else n2=n;
	drop _LABEL_  COL1 stars col1_a n;
run;
	
	
Data Mod_n;
	length Model_&var $8.;
	set &num (rename=(Label= Variable  ))
		&fit (rename=(Criterion= Variable))
		&rsq (rename=(Label2=Variable))
		&ascn(rename=(Label1=Variable));
	if Variable in ("Number of Observations Used", "AIC", "Percent Concordant") or Label1 in ("R-Square");
	
	if Model_&var= "" then do;
		if InterceptAndCovariates ne . then Model_&var=InterceptAndCovariates;
		else if Variable ="Max-rescaled R-Square" and cValue2 ne . then Model_&var=cValue2;
		else if NobsUsed ne . then  Model_&var=NobsUsed;
		else if Variable ="Percent Concordant" and cvalue1 ne . then  Model_&var=cvalue1;
	end;
	n2=40;
	keep Variable Model_&var _name_ variable_merge n2;
run;

Data Mod_out_&Var;
 	length Variable  $200.;
	set Mod_t Mod_n;
	*rename Model_&var = &DepVar._&var;
run;



*Increment counter;
%let i=%eval(&i+1);
%end;
%mend;
%let temp=NatOrig|SkinColor|USBorn;
%loop(&temp);


*reorder to merge;

Data Mod_out_NatOrig;
	set Mod_out_NatOrig;
	if variable_merge not in ("Intercept", "filter_rand") and n2<40 then n2=n2+4;
run;

Data Mod_out_SkinColor;
	set Mod_out_SkinColor;
	if variable_merge not in ("Intercept", "filter_rand") and n2<40 then n2=n2+15;
run;



proc sort data= Mod_out_NatOrig; by n2 variable_merge _name_ Variable classval0; run;
proc sort data= Mod_out_SkinColor; by n2 variable_merge _name_ Variable classval0; run;
proc sort data= Mod_out_USBorn; by n2 variable_merge _name_ Variable classval0; run;
Data mod_all;
	merge  Mod_out_NatOrig Mod_out_USBorn Mod_out_SkinColor  
		;
		by n2 variable_merge _name_ Variable classval0;
	*drop n2 variable_merge _name_  classval0;
run;

proc print data= mod_all noobs; run;


/*******pred probs******/

Data out.pred_interactions;
	set int_mod_origin (in=a)
		int_mod_Skin (in=b)
		int_mod_Nativity (in=c)
	;
	length pred $20.;
	if a then Pred="Origin";
	if b then Pred="Skin color";
	if c then Pred="Nativity";
	if p_skincolor ne "" then group=p_skincolor;
		else if p_Origin ne "" then group=p_Origin;
		else if p_nativity ne "" then group=p_nativity;
	group2=scan(group, 2, ':');
	if group2 ne "" then group3=group2;
		else group3=group;
	int=scan(group3, 1, ',');
	year=scan(group3, 2, ',');
	if year=2020 then type="Separate";
		else if  year=2030 then type="Combined";
	keep int year pred est_response l u type;
run;

/*************same models, pred indg*************/


proc logistic data = sl_dat_or desc ;
where hisp=1;
class  filter_rand / param = ref;
weight weight_genB_13;
model AmIndAN =   filter_rand Orig_PR Orig_Cuba Orig_Other
				Orig_PR_2030 Orig_Cuba_2030 Orig_Other_2030/rsq;
title "National Origin (vs Mexican)";
output out=p_out_OriginInd p=est_response UPPER=U LOWER=L;
ods output NObs=Mod_N_NatOrigInd FitStatistics=Mod_fit_NatOrigInd RSquare=Mod_Rsq_NatOrigInd
	ParameterEstimates= Mod_est_NatOrigInd OddsRatios=Mod_odds_NatOrigInd ModelInfo=Mod_Info_NatOrigInd
	Association=Mod_Ascn_NatOrigInd ;
run; 


proc freq data= p_out_OriginInd ;
	where p_Origin ne "";
	table filter_rand*p_Origin*est_response*l*u
		/ list missing norow nocol nocum out=int_mod_originInd;
	title "Origin";
run;


proc logistic data = sl_dat_or desc;
where hisp=1;
class  filter_rand / param = ref;
weight weight_genB_13;
model AmIndAN = filter_rand skin_med  skin_dark
				skin_med_2030  skin_dark_2030 /rsq;
title "Skin Color (vs Light)";
output out=p_out_SkinColorInd p=est_response UPPER=U LOWER=L;
ods output NObs=Mod_N_SkinColorInd FitStatistics=Mod_fit_SkinColorInd RSquare=Mod_Rsq_SkinColorInd
	ParameterEstimates= Mod_est_SkinColorInd OddsRatios=Mod_odds_SkinColorInd ModelInfo=Mod_Info_SkinColorInd
	Association=Mod_Ascn_SkinColorInd;
	title "Skin Color";
run;
	 

proc freq data= p_out_SkinColorInd;
	where p_SkinColor ne "";
	table p_SkinColor*est_response*l*u/ list missing norow nocol nocum out=int_mod_skinInd;
	title "Skin Color";
run;


	 
proc logistic data = sl_dat_or desc;
where hisp=1;
class filter_rand / param = ref;
weight weight_genB_13;
model AmIndAN = filter_rand R_US_Born
				 R_US_Born_2030/rsq;
output out=p_out_NativityInd p=est_response UPPER=U LOWER=L;
title "US Born";
ods output NObs=Mod_N_USBornInd FitStatistics=Mod_fit_USBornInd RSquare=Mod_Rsq_USBornInd
	ParameterEstimates= Mod_est_USBornInd OddsRatios=Mod_odds_USBornInd ModelInfo=Mod_Info_USBornInd
	Association=Mod_Ascn_USBornInd;
run;


proc freq data= p_out_NativityInd;
	where p_nativity ne "";
	table p_nativity*est_response*l*u/ list missing norow nocol nocum  out=int_mod_NativityInd;
run;



%macro loop(varlist);
%let i=1;
%do %while (%scan(&varlist, &i, |) ^=%str());
%let var=%scan(&varlist, &i, |); 
%put &var;


%let model= Mod_est_&var;
%let fit= Mod_fit_&var;
%let num= Mod_N_&var;
%let rsq= Mod_Rsq_&var;
%let VarName= Mod_Info_&var;
%let ascn= Mod_Ascn_&var;

Data &VarName;
	set &VarName;
	if description="Response Variable";
	CALL SYMPUT('DepVar',value) ;
run;
%put &DepVar &var;


/*put global macros for starting model and output name*/
%put &model &fit &num &rsq ;


*full model;


Data Mod;
	set &model ;
	length stars $5. ClassVal0 $30.;
	if strip(ProbChiSq) <.0001 then stars="***";
		else if ProbChiSq <.001 then stars="**";
		else if ProbChiSq <.05 then stars="*";
		else if ProbChiSq <.1 then stars="†";
	n+1;
	if ClassVal0="" then ClassVal0="";
	keep n Variable estimate StdErr stars   ClassVal0 ;

run;


proc transpose data= Mod out=Mod_t;
	var estimate StdErr;
	by n Variable stars  ClassVal0;
run;

Data Mod_t;
	set Mod_t;
	col1_a=round(col1,.001);
	Variable_merge=Variable;
	If _NAME_= "Estimate" then Model_&var=cats(col1_a,stars);
		else if  _NAME_= "StdErr" then do;
			Model_&var=cats("(",col1_a,")");
			Variable="";
		end;
	if _NAME_= "StdErr" then n2=n+.5;
		else n2=n;
	drop _LABEL_  COL1 stars col1_a n;
run;
	
	
Data Mod_n;
	length Model_&var $8.;
	set &num (rename=(Label= Variable  ))
		&fit (rename=(Criterion= Variable))
		&rsq (rename=(Label2=Variable))
		&ascn(rename=(Label1=Variable));
	if Variable in ("Number of Observations Used", "AIC", "Percent Concordant") or Label1 in ("R-Square");
	
	if Model_&var= "" then do;
		if InterceptAndCovariates ne . then Model_&var=InterceptAndCovariates;
		else if Variable ="Max-rescaled R-Square" and cValue2 ne . then Model_&var=cValue2;
		else if NobsUsed ne . then  Model_&var=NobsUsed;
		else if Variable ="Percent Concordant" and cvalue1 ne . then  Model_&var=cvalue1;
	end;
	n2=40;
	keep Variable Model_&var _name_ variable_merge n2;
run;

Data Mod_out_&Var;
 	length Variable  $200.;
	set Mod_t Mod_n;
	*rename Model_&var = &DepVar._&var;
run;



*Increment counter;
%let i=%eval(&i+1);
%end;
%mend;
%let temp=NatOrigInd|SkinColorInd|USBornInd;
%loop(&temp);


*reorder to merge;

Data Mod_out_NatOrigInd;
	set Mod_out_NatOrigInd;
	if variable_merge not in ("Intercept", "filter_rand") and n2<40 then n2=n2+4;
run;

Data Mod_out_SkinColorInd;
	set Mod_out_SkinColorInd;
	if variable_merge not in ("Intercept", "filter_rand") and n2<40 then n2=n2+15;
run;



proc sort data= Mod_out_NatOrigInd; by n2 variable_merge _name_ Variable classval0; run;
proc sort data= Mod_out_SkinColorInd; by n2 variable_merge _name_ Variable classval0; run;
proc sort data= Mod_out_USBornInd; by n2 variable_merge _name_ Variable classval0; run;
Data mod_allInd;
	merge Mod_out_NatOrigInd Mod_out_USBornInd  Mod_out_SkinColorInd  
		;
		by n2 variable_merge _name_ Variable classval0;
	*drop n2 variable_merge _name_  classval0;
run;

proc print data= mod_allInd noobs; run;


/*******pred probs******/

Data out.pred_interactionsInd;
	set int_mod_originInd (in=a)
		int_mod_SkinInd (in=b)
		int_mod_NativityInd (in=c)
	
	;
	length pred $20.;
	if a then Pred="Origin";
	if b then Pred="Skin color";
	if c then Pred="Nativity";
	if p_SkinColor ne "" then group=p_SkinColor;
		else if p_Origin ne "" then group=p_Origin;
		else if p_nativity ne "" then group=p_nativity;
	group2=scan(group, 2, ':');
	if group2 ne "" then group3=group2;
		else group3=group;
	int=scan(group3, 1, ',');
	year=scan(group3, 2, ',');
	if year=2020 then type="Separate";
		else if  year=2030 then type="Combined";
	keep int year pred est_response l u type;
run;


/*Combine both*/

Data Table_S3;
	merge  mod_all mod_allInd 
		;
		by n2 variable_merge _name_ Variable classval0;
	drop n2 variable_merge _name_  classval0;
run;

proc print data= Table_S3 noobs; run;
